Libraries and data

Load metadata

all_meta <- read_csv(file = here::here('data/drawings/stringent_cleaned_dataset_meta/all_object_metadata_cleaned.csv')) %>%
  as_tibble() %>%
  # filter(age_numeric>2) %>%
  mutate(category = str_split_fixed(category,' ',2)[,2]) %>%
  mutate(category = str_replace(category,' ','.'))  # ice cream
## Parsed with column specification:
## cols(
##   session_id = col_character(),
##   category = col_character(),
##   age = col_character(),
##   num_strokes = col_double(),
##   draw_duration_old = col_double(),
##   draw_duration_new = col_double(),
##   mean_intensity = col_double(),
##   age_numeric = col_double(),
##   filename = col_character()
## )

Basic descriptives of filtered set

num_subs = length(unique(all_meta$session_id))

The final, filtered dataset of N=37770 drawings from 48 categories from 8084 children who were on average 5.3306328years of age (range 3-10 years).

Load frequency ratings

frequency = read_csv(file = here::here('data/surveys/drawing_experience/preprocessed/Category_frequency_survey.csv')) 
## Parsed with column specification:
## cols(
##   childs_age = col_double(),
##   subject_id = col_character(),
##   category = col_character(),
##   often_drawn_rating = col_double()
## )

To assess this, 51 parents of children aged 3-10 years filled out a survey asking about the frequency with with their children drew the categories in the dataset.

count_by_age <- frequency %>%
  group_by(childs_age) %>%
  dplyr::summarize(num_surveys = length(unique(subject_id)))
## `summarise()` ungrouping output (override with `.groups` argument)
freq_by_category <- frequency %>%
  # filter(childs_age > 2) %>%
  mutate(category = str_split_fixed(category,' ',2)[,2]) %>%
  mutate(category = str_replace(category,' ','.')) %>%
  filter(category %in% all_meta$category) %>%
  group_by(category) %>%
  summarize(drawing_frequency = mean(often_drawn_rating)) %>%
  mutate(above_median_freq = drawing_frequency > median(drawing_frequency)) 
## `summarise()` ungrouping output (override with `.groups` argument)
# write_csv(freq_by_category, here::here('data/surveys/drawing_experience/preprocessed/freq_by_category.csv'))

Load tracing data

all_tracings <- read_csv(here('data/tracing/rated_all_museumstation_filtered.csv'))%>%
  select(-X1, -X)
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   X = col_double(),
##   session_id = col_character(),
##   age = col_double(),
##   category = col_character(),
##   pre_tran = col_double(),
##   post_tran = col_double(),
##   rotate = col_double(),
##   translate = col_double(),
##   scale = col_double(),
##   has_ref = col_logical(),
##   rating = col_double()
## )
## Make averages for joining
by_subject_tracing_avg <- all_tracings %>%
  distinct(session_id, category, age, rating) %>% 
  group_by(session_id) %>%
  summarize(avg_tracing_rating = mean(rating))
## `summarise()` ungrouping output (override with `.groups` argument)

Load animacy/size (hardcoding)

animacy_csv <- read_csv(here::here('data/drawings/category_metadata/animacy.csv')) %>%
  as_tibble() %>%
  mutate(animacy_size = case_when(animacy == '0' & size=='0' ~ 2,
                                  animacy == '0' & size=='1' ~ 1,
                                  animacy == '1' & size=='0' ~ 3,
                                  animacy == '1' & size=='1' ~ 4))
## Parsed with column specification:
## cols(
##   animacy = col_double(),
##   size = col_double(),
##   category = col_character(),
##   vehicle = col_double()
## )

Load classification data

num_batches=232
reg_string = 'C_0.1_T_0.1'
classification_data <- read.csv(here::here('data','compiled_classifications/',paste0(reg_string, 'batchtotal_',as.character(num_batches),'.csv'))) %>%
  mutate(session_id = paste('cdm_',session_id,sep="")) %>%
  mutate(age_numeric = age) %>%
  mutate(age = paste('age',age,sep="")) %>%
  mutate(age = as.factor(age)) %>%
  mutate(category = target_label) %>%
  mutate(image_name = paste(target_label,'_sketch_', age,'_', session_id,'.png',sep="")) %>%
  select(-X)  %>%
  mutate(category = str_replace(category,' ','.'))  # ice cream = ice.cream

Join meta data, drawing frequnecy, and classification data

d <- classification_data %>%
  mutate(correct_or_not = as.logical(correct_or_not)) %>%
  gather(key = 'class', value = 'prob', contains('prob')) %>%
  mutate(class = str_split_fixed(class, '_prob',2)[,1]) %>%
  group_by(image_name, age, category, correct_or_not, session_id, age_numeric) %>%
  summarize(denom = sum(prob), target_label_prob = prob[class==category], log_odds = log(target_label_prob / (denom - target_label_prob))) %>%
  rename(filename = image_name) %>%
  left_join(all_meta, by=c("filename", "category", "age_numeric","session_id")) %>%
  mutate(draw_duration = draw_duration_old) %>%
  mutate(run = substr(session_id,0,10)) %>%
  left_join(freq_by_category)
## `summarise()` regrouping output by 'image_name', 'age', 'category', 'correct_or_not', 'session_id' (override with `.groups` argument)
## Joining, by = "category"
d <- d %>%
  left_join(by_subject_tracing_avg)
## Joining, by = "session_id"

Asserts to check all the joins

# weird things were happening with category matching, check
assert_that(length(unique(d$filename)) == length(unique(classification_data$image_name)))
## [1] TRUE
# every drawing should have all of these, regardless
assert_that(sum(is.na(d$age_numeric))==0)
## [1] TRUE
assert_that(sum(is.na(d$category))==0)
## [1] TRUE
assert_that(sum(is.na(d$correct_or_not))==0)
## [1] TRUE
missing_meta <- d %>%
  filter(is.na(num_strokes))

assert_that(length(missing_meta$filename)==0)
## [1] TRUE

Basic descriptives

Tracing sanity check

Look at tracing estimates by age and how they are correlated - nice!

by_subject_tracing <- all_tracings %>%
  group_by(session_id) %>%
  distinct(session_id, category, age, rating) %>%
  mutate(count_both = n())  %>%
  filter(count_both == 2) %>%
  spread(category, rating)

ggplot(by_subject_tracing, aes(x=shape, y=square)) +
  geom_jitter(width=.2, height=.2, alpha=.2) + 
  facet_grid(~age) 

tracing_cor = cor.test(by_subject_tracing$square, by_subject_tracing$shape)

Within participants who had valid tracing trials for both shapes (N=6756), tracing scores for each of the two shapes were reasonably correlated (r=0.6018196, t=61.9298676, P<.001), despite the irregular shape being notably harder to trace than the square.

In order to estimate these weights, we collected quality ratings from adult observers (\(N\)=70) for 1325 tracings (i.e., 50-80 tracings per shape per age), each of which was rated 1-5 times. Raters were instructed to evaluate “how well the tracing matches the target shape and is aligned to the position of the target shape” on a 5-point scale.

We fit an ordinal regression mixed-effects model to predict these 5-point ratings, which contained correlation distance, translation, rotation, scaling, and shape identity (square vs. star) as predictors, with random intercepts for rater. This model yielded parameter estimates that could then be used to score each tracing in the dataset (N=14372 tracings from 7612 children). We averaged scores within session to yield a single tracing score for each participant (XX children completed at least one tracing trial).

Classification accuracy, effort, tracing across age

### How do our covariates change with age? Compute means and CIs; Group by age/category

## first summarize data  
cor_by_age <- d %>%
  group_by(age_numeric,category) %>%
  summarize(avg_cor = mean(correct_or_not)) %>%
  group_by(age_numeric) %>%
  multi_boot_standard(col = "avg_cor")  
## `summarise()` regrouping output by 'age_numeric' (override with `.groups` argument)
# cor_by_age_by_session <- d %>%
#   group_by(session_id, age_numeric) %>%
#   summarize(avg_cor = mean(correct_or_not)) %>%
#   group_by(age_numeric) %>%
#   multi_boot_standard(col = "avg_cor")  

draw_duration <- d %>%
  group_by(age_numeric,category) %>%
  summarize(avg_draw_duration = mean(draw_duration)) %>%
  group_by(age_numeric) %>%
  multi_boot_standard(col = "avg_draw_duration")
## `summarise()` regrouping output by 'age_numeric' (override with `.groups` argument)
num_strokes <- d %>%
  group_by(age_numeric,category) %>%
  summarize(avg_num_strokes = mean(num_strokes)) %>%
  group_by(age_numeric) %>%
  multi_boot_standard(col = "avg_num_strokes") 
## `summarise()` regrouping output by 'age_numeric' (override with `.groups` argument)
avg_intensity <- d %>%
  group_by(age_numeric,category) %>%
  summarize(avg_intensity = mean(mean_intensity)) %>%
  group_by(age_numeric) %>%
  multi_boot_standard(col = "avg_intensity")
## `summarise()` regrouping output by 'age_numeric' (override with `.groups` argument)
tracing_scores <- d %>%
  distinct(session_id,age_numeric,avg_tracing_rating) %>%
  filter(!is.na(avg_tracing_rating)) %>%
  group_by(age_numeric) %>%
  multi_boot_standard(col = "avg_tracing_rating")

Plot separately classification, effort, tracing by age

## Make compiled plot of descriptives
base_size_chosen=12 # size of text in plots
smooth_alpha=.2

cor_by_age_plot_A = ggplot(cor_by_age, aes(age_numeric,mean, color=age_numeric)) +
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) + 
  theme_few(base_size = base_size_chosen) + 
  labs(x='Age', y='Classification accuracy') +
  scale_color_viridis(option="D") + 
  theme(legend.position = "none") + 
  geom_smooth(col='grey', alpha=smooth_alpha) +
  ylim(0,.75) + 
  geom_hline(yintercept = 1/48, linetype="dashed", color="grey") +
 ggtitle('A')

p1=ggplot(draw_duration, aes(age_numeric,mean, color=age_numeric)) +
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) +
  theme_few(base_size = base_size_chosen) +
  labs(x='Age', y='Drawing duration (s)') +
  scale_color_viridis(option="D") +
  theme(legend.position = "none") + 
  ylim(0,15) + 
  geom_smooth(col='grey', span = 10) +
  ggtitle('B')

p2=ggplot(avg_intensity, aes(age_numeric,mean, color=age_numeric)) +
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) +
  theme_few(base_size = base_size_chosen) +
  labs(x='Age', y='Ink used (mean intensity)') +
  scale_color_viridis(option="D") +
  theme(legend.position = "none") + 
  ylim(.02,.06) + 
  geom_smooth(col='grey', span = 10,alpha=smooth_alpha)  +
  ggtitle('C')

p3=ggplot(num_strokes, aes(age_numeric,mean, color=age_numeric)) +
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) +
  theme_few(base_size = base_size_chosen) +
  labs(x='Age', y='Number of strokes') +
  scale_color_viridis(option="D") +
  theme(legend.position = "none") +
  ylim(0,20) + 
  geom_smooth(col='grey', span = 10,alpha=smooth_alpha)  +
  ggtitle('D')
        
p4=ggplot(tracing_scores, aes(age_numeric,mean, color=age_numeric)) +
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) +
  theme_few(base_size = base_size_chosen) +
  labs(x='Age', y='Estimated tracing score') +
  scale_color_viridis(option="D") +
  theme(legend.position = "none") +
  ylim(0,4) +
  geom_smooth(col='grey', span = 10,alpha=smooth_alpha)  +
  ggtitle('E')
ggarrange(cor_by_age_plot_A,p1,p2,p3,p4, nrow=1)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggsave('figures/mainResults.pdf',width=7.5, height=3, units='in')
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Render out examples of low ink, time, strokes

Plot 1: How does classification accuracy interact with age x effort & tracing ?

num_bins = 3

cor_by_age_by_strokes <- d %>%
  ungroup() %>%
  mutate(bin = ntile(num_strokes, num_bins)) %>%
  group_by(bin, age_numeric,category) %>%
  summarize(avg_cor = mean(correct_or_not)) %>%
  group_by(age_numeric,bin) %>%
  multi_boot_standard(col = "avg_cor")   %>%
  mutate(covariate = 'by strokes drawn')
## `summarise()` regrouping output by 'bin', 'age_numeric' (override with `.groups` argument)
cor_by_age_by_time <- d %>%
  ungroup() %>%
  mutate(bin = ntile(draw_duration, num_bins)) %>%
  group_by(bin, age_numeric,category) %>%
  summarize(avg_cor = mean(correct_or_not)) %>%
  group_by(age_numeric,bin) %>%
  multi_boot_standard(col = "avg_cor")  %>%
  mutate(covariate = 'by time spent')
## `summarise()` regrouping output by 'bin', 'age_numeric' (override with `.groups` argument)
cor_by_age_by_intensity <- d %>%
  ungroup() %>%
  mutate(bin = ntile(mean_intensity, num_bins)) %>%
  group_by(bin, age_numeric,category) %>%
  summarize(avg_cor = mean(correct_or_not)) %>%
  group_by(age_numeric,bin) %>%
  multi_boot_standard(col = "avg_cor")   %>%
  mutate(covariate = 'by ink used')
## `summarise()` regrouping output by 'bin', 'age_numeric' (override with `.groups` argument)
cor_by_age_by_tracing <- d %>%
  ungroup() %>%
  filter(!is.na(avg_tracing_rating)) %>%
  mutate(bin = ntile(avg_tracing_rating, num_bins)) %>%
  group_by(bin, age_numeric,category) %>%
  summarize(avg_cor = mean(correct_or_not)) %>%
  group_by(age_numeric,bin) %>%
  multi_boot_standard(col = "avg_cor")   %>%
  mutate(covariate = 'by tracing ability')
## `summarise()` regrouping output by 'bin', 'age_numeric' (override with `.groups` argument)
merged <- cor_by_age_by_intensity %>%
  full_join(cor_by_age_by_strokes) %>%
  full_join(cor_by_age_by_time) %>%
  full_join(cor_by_age_by_tracing) %>%
  # filter(age_numeric > 2) %>%
  mutate(bin_name = as.numeric(bin))
## Joining, by = c("age_numeric", "bin", "ci_lower", "ci_upper", "mean", "covariate")
## Joining, by = c("age_numeric", "bin", "ci_lower", "ci_upper", "mean", "covariate")
## Joining, by = c("age_numeric", "bin", "ci_lower", "ci_upper", "mean", "covariate")
  # mutate(bin_name = case_when(bin == 1 ~ "Low",
  #                             bin == 2 ~ "Medium",
  #                             bin == 3 ~ "High"))


ggplot(merged, aes(age_numeric,mean*100, color=bin_name, group=bin_name, col=bin_name)) +
  geom_pointrange(aes(ymin = ci_lower*100, ymax = ci_upper*100), alpha=.6, size=.25) +
  theme_few(base_size = 10) +
  labs(x='Age of child drawing (yrs)', y='Category classification \n accuracy') +
  # scale_color_viridis(option="C", begin=.2, end=.8, discrete=TRUE, name = 'Effort') +
  scale_x_continuous(breaks=seq(2,10,1)) +
  geom_smooth(span=10, alpha=smooth_alpha, size=.4) +
  ylim(0,100) +
  geom_hline(yintercept = 1/48, linetype="dashed", color="grey") +
  theme(legend.position ='none',aspect.ratio = 1) +
  scale_fill_discrete(labels=c('Low','Medium','High')) + 
  # theme(aspect.ratio = 1, legend.position = c(.08, .75),legend.text = element_text(size=6),legend.title = element_text(size=8),legend.background =   element_rect(fill=alpha('white', 0))) +
  facet_grid(~covariate)
## Warning: Duplicated aesthetics after name standardisation: colour
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggsave('figures/visuomotor_control_wide.pdf', units='in')
## Saving 7 x 5 in image
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
cor_by_age_by_tracing_only <- d %>%
  ungroup() %>%
  filter(!is.na(avg_tracing_rating)) %>%
  mutate(bin = ntile(avg_tracing_rating, 3)) %>%
  group_by(bin, age_numeric,category) %>%
  summarize(avg_cor = mean(correct_or_not)) %>%
  group_by(age_numeric,bin) %>%
  multi_boot_standard(col = "avg_cor")  
## `summarise()` regrouping output by 'bin', 'age_numeric' (override with `.groups` argument)
ggplot(cor_by_age_by_tracing_only, aes(age_numeric,mean*100, color=bin, group=bin, col=bin)) +
  geom_pointrange(aes(ymin = ci_lower*100, ymax = ci_upper*100), alpha=.6, size=.25) +
  theme_few(base_size = 14) +
  labs(x='Age of child drawing (yrs)', y='Category classification \n accuracy') +
  scale_x_continuous(breaks=seq(2,10,1)) +
  geom_smooth(span=10, alpha=smooth_alpha, size=.4) +
  ylim(0,80) +
  geom_hline(yintercept = 1/48, linetype="dashed", color="grey") +
  theme(legend.position ='none',aspect.ratio = 1) 
## Warning: Duplicated aesthetics after name standardisation: colour
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

  # scale_fill_discrete(labels=c('Low','Medium','High')) + 
  # theme(aspect.ratio = 1, legend.position = c(.08, .75),legend.text = element_text(size=6),legend.title = element_text(size=8),legend.background =   element_rect(fill=alpha('white', 0))) 
tracing_scores_raw <- d %>%
  distinct(session_id,age_numeric,avg_tracing_rating) %>%
  filter(!is.na(avg_tracing_rating))

tracing_scores <- d %>%
  distinct(session_id,age_numeric,avg_tracing_rating) %>%
  filter(!is.na(avg_tracing_rating)) %>%
  group_by(age_numeric) %>%
  multi_boot_standard(col = "avg_tracing_rating")

      
ggplot(tracing_scores, aes(age_numeric,mean, color=age_numeric)) +
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) +
  geom_jitter(height=.1, width=.4, data=tracing_scores_raw, aes(y=avg_tracing_rating, x=age_numeric), alpha=.02) +
  theme_few(base_size = 14) +
  labs(x='Age', y='Estimated tracing score') +
  scale_color_viridis(option="D") +
  scale_x_continuous(breaks=seq(2,10,1)) +
  theme(legend.position = "none") +
  ylim(0,4) +
  geom_smooth(col='grey', span = 10,alpha=smooth_alpha) 
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 1034 rows containing missing values (geom_point).

cor_by_session <- d %>%
  group_by(age_numeric,session_id) %>%
  # filter(age_numeric >2) %>%
  dplyr::summarize(mean = mean(correct_or_not), num_drawings = n()) %>%
  group_by(age_numeric)
## `summarise()` regrouping output by 'age_numeric' (override with `.groups` argument)
base_size_chosen=10 

cor_by_category <- d %>%
  group_by(age_numeric,category) %>%
  # filter(age_numeric >2) %>%
  dplyr::summarize(mean = mean(correct_or_not), num_drawings = n()) %>%
  group_by(age_numeric)
## `summarise()` regrouping output by 'age_numeric' (override with `.groups` argument)
base_size_chosen=10 
base_size_chosen=10
smooth_alpha=.01
ggplot(cor_by_age, aes(age_numeric,mean*100, col=age_numeric)) +  
  geom_jitter(data=cor_by_category, width=.1, height=0, alpha=.2) +
  geom_pointrange(aes(y=mean*100, ymin = ci_lower*100, ymax = ci_upper*100)) +
  geom_smooth(alpha=smooth_alpha, color='grey', span=10) +
  theme_few(base_size = base_size_chosen) + 
  labs(x='Age of child drawing (yrs)', y='Drawing classification accuracy') +
  scale_x_continuous(breaks = seq(2,10,1)) + 
  theme(legend.position = "none", aspect.ratio = 1) +
  scale_color_viridis(option="D", breaks=seq(2,10,1)) +
  geom_hline(yintercept = 1/48, linetype="dashed", color="grey") 
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggsave('figures/cor_by_category.pdf', width=3, height=3, units='in')
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(cor_by_age, aes(age_numeric,mean, col=age_numeric)) +  
  geom_jitter(data=cor_by_session, width=.1, height=.05, alpha=.01) +
  geom_pointrange(aes(y=mean, ymin = ci_lower, ymax = ci_upper)) +
  geom_smooth(alpha=smooth_alpha, color='grey', span=10) +
  theme_few(base_size = base_size_chosen) + 
  labs(x='Age', y='Classification accuracy') +
  scale_x_continuous(breaks = seq(3,10,1)) + 
  theme(legend.position = "none", aspect.ratio = 1) +
  scale_color_viridis(option="D", breaks=seq(3,10,1)) +
  geom_hline(yintercept = 1/48, linetype="dashed", color="grey") 
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

ggsave('figures/cor_by_session.pdf', width=3, height=3, units='in')
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
# ggplot(cor_by_session %>% filter(age_numeric > 2), aes(x=as.factor(age_numeric),y=mean, col=age_numeric, size=num_drawings)) +
#   geom_flat_violin() +
#   geom_point(position = position_jitter(width=.15, height = .01), size = .25)+
#   theme_cowplot()+
#   guides(fill = FALSE, colour = FALSE) 
# 
# 
#   # geom_jitter(data=cor_by_session, width=.1, height=.1, alpha=.01, aes(size=num_drawings)) +
#   # geom_pointrange(aes(y=mean, ymin = ci_lower, ymax = ci_upper)) +
#   geom_smooth(alpha=smooth_alpha, color='grey') +
#   theme_few(base_size = base_size_chosen) + 
#   labs(x='Age', y='Classification accuracy') +
#   scale_color_viridis(option="D") +
#   theme(legend.position = "none", aspect.ratio = 1) +
#   ylim(0,.75) + 
#   geom_hline(yintercept = 1/48, linetype="dashed", color="grey") 
cor_by_category_by_age <- d %>%
  group_by(age_numeric,category) %>%
  summarize(avg_log_odds = mean(log_odds), num_drawings = n()) %>%
  left_join(freq_by_category) %>%
  mutate(category = fct_reorder(category, drawing_frequency, .desc=TRUE))
## `summarise()` regrouping output by 'age_numeric' (override with `.groups` argument)
## Joining, by = "category"
ggplot(cor_by_category_by_age, aes(age_numeric,avg_log_odds, color=drawing_frequency, size=num_drawings)) +
  geom_point(alpha=.5) + 
  theme_few(base_size = base_size_chosen) + 
  labs(x='Age', y='Log odds') +
  scale_color_viridis(option="A") +
  theme(legend.position = "none") +
  geom_smooth(span=10, alpha=smooth_alpha) +
  # ylim(0,1) + 
  geom_hline(yintercept = 1/48, linetype="dashed", color="grey") +
  facet_wrap(~category, nrow=6)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Variance by category

Extra plot: Average correct with CIs by category and age

cor_by_category_by_age <- d %>%
  filter(age_numeric > 2) %>%
  group_by(session_id, age_numeric,category) %>%
  dplyr::summarize(avg_cor = mean(correct_or_not)) %>%
  group_by(age_numeric, category) %>%
  multi_boot_standard(col = 'avg_cor') %>%
  ungroup () %>%
  left_join(freq_by_category) %>%
  mutate(category = fct_reorder(category, drawing_frequency))
## `summarise()` regrouping output by 'session_id', 'age_numeric' (override with `.groups` argument)
## Joining, by = "category"
ggplot(cor_by_category_by_age, aes(age_numeric,mean, color=drawing_frequency)) +
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper), position=position_dodge(width=.2), alpha=.8) +
  theme_few(base_size = base_size_chosen) +
  labs(x='Age', y='Proportion correct') +
  scale_color_viridis(option="A", begin=.4, end=.8) +
  theme(legend.position = "none") +
  geom_smooth(span=10, alpha=smooth_alpha) +
  # ylim(0,1) +
  geom_hline(yintercept = 1/48, linetype="dashed", color="grey") +
  facet_wrap(~category, nrow=6)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Extra plot: Classification accuracy by more or less frequently drawn

# first summarize data
cor_by_age_low_freq <- d %>%
  group_by(above_median_freq, age_numeric,category) %>%
  summarize(avg_cor = mean(correct_or_not)) %>%
  group_by(age_numeric, above_median_freq) %>%
  multi_boot_standard(col = "avg_cor")
## `summarise()` regrouping output by 'above_median_freq', 'age_numeric' (override with `.groups` argument)
ggplot(cor_by_age_low_freq, aes(age_numeric,mean, color=above_median_freq)) +
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper)) +
  theme_few(base_size = base_size_chosen) +
  labs(x='Age', y='Classification accuracy') +
  # scale_color_viridis(option="D") +
  theme(legend.position = "none") +
  geom_smooth(col='grey',span=10, alpha=smooth_alpha) +
  ylim(0,.75) +
  geom_hline(yintercept = 1/48, linetype="dashed", color="grey")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

What about for correct drawing only?

Plot 2: Changes in log-odds by age group for correctly classified drawings only

lo_correct_category_by_age <- d %>%
  # filter(age_numeric > 2) %>%
  filter(correct_or_not==1) %>%
  mutate(category = str_replace(category,'ice.cream','ice cream')) %>%  # ice.cream -> ice_cream
  mutate(age = cut(age_numeric, c(1.9, 5, 8,10.1), labels = c("2-4","5-7","8-10"))) %>%
  group_by(session_id, age,category) %>%
  summarize(avg_cor = mean(log_odds), num_drawings = n()) %>%
  group_by(age, category) %>%
  multi_boot_standard(col = 'avg_cor') %>%
  ungroup () %>%
  mutate(category = fct_reorder(category, mean)) 
## `summarise()` regrouping output by 'session_id', 'age' (override with `.groups` argument)
num_classes=48
chance = log(1/num_classes) - log ((num_classes - 1)/num_classes)

base_size_chosen=10
ggplot(lo_correct_category_by_age, aes(x = category, y = mean, col = age)) +
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper), alpha=.8, size=.2) +
  coord_flip() +
  theme_few(base_size = base_size_chosen) + 
  labs(y = "Classifier evidence (log odds)", x = "") +
  scale_color_viridis(discrete=TRUE, begin=0, end=.8, name = "Age group") +
  theme(legend.position = c(.8,.25), axis.text.y = element_text(size=5), legend.text = element_text(size=8), legend.background =  element_rect(fill=alpha('white', 0)), aspect.ratio=1) 
## Warning: Removed 1 rows containing missing values (geom_segment).

ggsave("figures/log_odds_by_category.pdf", units = 'in', height= 3)
## Saving 7 x 3 in image
## Warning: Removed 1 rows containing missing values (geom_segment).
lo_correct_category_by_age_small <- lo_correct_category_by_age %>%
  group_by(category) %>%
  mutate(category_avg = mean(mean)) %>%
  filter(category_avg > -1)
  

ggplot(lo_correct_category_by_age_small, aes(x = category, y = mean, col = age)) +
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper), alpha=.8, size=.2) +
  coord_flip() +
  theme_few(base_size = 12) + 
  labs(y = "Classifier evidence (log odds)", x = "") +
  scale_color_viridis(discrete=TRUE, begin=0, end=.8, name = "Age group") +
  theme(legend.position = 'none') 

  # theme(legend.position = c(.8,.25),legend.text = element_text(size=8), legend.background =  element_rect(fill=alpha('white', 0)))

ggsave("figures/log_odds_by_category_small.pdf", width = 3, height = 3, units = 'in')
log_odds_by_age_cor_only <- d %>%
  filter(correct_or_not==1) %>%
  group_by(age_numeric,category) %>%
  summarize(avg_log_odds = mean(log_odds)) %>%
  group_by(age_numeric) %>%
  multi_boot_standard(col = "avg_log_odds") 
## `summarise()` regrouping output by 'age_numeric' (override with `.groups` argument)
log_odds_by_age_cor_only_by_category <- d %>%
  filter(correct_or_not==1) %>%
  group_by(age_numeric,category) %>%
  summarize(mean = mean(log_odds)) 
## `summarise()` regrouping output by 'age_numeric' (override with `.groups` argument)
base_size_chosen=8
ggplot(log_odds_by_age_cor_only, aes(age_numeric,mean, col=age_numeric)) +  
  # geom_jitter(data=log_odds_by_age_cor_only_by_category, width=.1, height=0, alpha=.3) +
  geom_pointrange(aes(y=mean, ymin = ci_lower, ymax = ci_upper)) +
  geom_smooth(alpha=smooth_alpha, color='grey', span=10) +
  theme_few(base_size = base_size_chosen) + 
  labs(x='Age', y='Classification evidence') +
  scale_x_continuous(breaks = seq(2,10,1)) + 
  theme(legend.position = "none", aspect.ratio = 1) +
  scale_color_viridis(option="D", breaks=seq(2,10,1)) 
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

  # geom_hline(yintercept = 1/48, linetype="dashed", color="grey") 


ggsave('figures/cor_only_log_odds_by_category.pdf', width=3, height=3, units='in')
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Confusions for misclassified draiwngs

Extra plot: Visualize probability assigned to other cateogries for “dog”, which is poorly classified…many animals

dog_probs <- classification_data %>%
  select(-image_name, -batch, -batch_str, -X.1, -index,   -age, -target_label,-session_id, -correct_or_not, -age_numeric) %>%
  filter(category == 'dog') %>%
  gather(key = category, value = prob) %>%
  group_by(category) %>%
  mutate(mean_prob = mean(prob)) %>%
  ungroup() %>%
  mutate(category = fct_reorder(category, mean_prob, .desc=TRUE))

ggplot(dog_probs, aes(x=category, y=prob)) +
  geom_boxplot(alpha=.2) +
  theme_few() + 
  theme(axis.text.x = element_text(angle = 90))  

Make long form classification data

classification_data_long <- classification_data %>%
  mutate(correct_or_not = as.logical(correct_or_not)) %>%
  gather(key = 'class', value = 'prob', contains('prob')) %>%
  mutate(class = str_split_fixed(class, '_prob',2)[,1]) 

Calculate avg prob by category for misclassified

confusions_by_age <- classification_data_long %>%
  mutate(drawn_category = category) %>%
  left_join(animacy_csv) %>%
  ungroup() %>%
  filter(correct_or_not==0) %>%
  filter(age_numeric > 2) %>%
  mutate(drawn_category = fct_reorder(drawn_category, animacy_size)) %>%
  mutate(class = factor(class, levels = levels(drawn_category))) %>%
  mutate(age_group = cut(age_numeric, c(2.9, 5, 7,10.1), labels = c("3-4","5-6","7-10"))) %>%
  group_by(age_group, age_numeric, drawn_category, class) %>%
  dplyr::summarize(mean_prob = mean(prob)) 
## Joining, by = "category"
## `summarise()` regrouping output by 'age_group', 'age_numeric', 'drawn_category' (override with `.groups` argument)
confusions_by_class <- classification_data_long %>%
  mutate(drawn_category = category) %>%
  left_join(animacy_csv) %>%
  ungroup() %>%
  filter(correct_or_not==0) %>%
  # filter(age_numeric > 2) %>%
  mutate(drawn_category = fct_reorder(drawn_category, animacy_size)) %>%
  mutate(class = factor(class, levels = levels(drawn_category))) %>%
  # mutate(age_group = cut(age_numeric, c(2.9, 5, 7,10.1), labels = c("3-4","5-6","7-10"))) %>%
  group_by(drawn_category, class) %>%
  dplyr::summarize(mean_prob = mean(prob)) 
## Joining, by = "category"
## `summarise()` regrouping output by 'drawn_category' (override with `.groups` argument)
levels(confusions_by_age$drawn_category)
##  [1] "airplane"  "bed"       "bike"      "boat"      "cactus"   
##  [6] "car"       "chair"     "couch"     "house"     "piano"    
## [11] "train"     "tree"      "TV"        "apple"     "book"     
## [16] "bottle"    "bowl"      "clock"     "cup"       "hat"      
## [21] "ice.cream" "key"       "lamp"      "mushroom"  "phone"    
## [26] "scissors"  "watch"     "bee"       "bird"      "cat"      
## [31] "dog"       "face"      "fish"      "frog"      "hand"     
## [36] "rabbit"    "snail"     "spider"    "bear"      "camel"    
## [41] "cow"       "elephant"  "horse"     "octopus"   "person"   
## [46] "sheep"     "tiger"     "whale"
sanity <- confusions_by_class %>%
  group_by(drawn_category) %>%
  summarise(sum_prob = sum(mean_prob))
## `summarise()` ungrouping output (override with `.groups` argument)
confusions_count_by_class <- classification_data_long %>%
  mutate(drawn_category = category) %>%
  left_join(animacy_csv) %>%
  ungroup() %>%
  group_by(image_name) %>%
  filter(correct_or_not==0) %>%
  filter(prob == max(prob)) %>%
  group_by(drawn_category) %>%
  mutate(count_category = n()) %>%
  ungroup() %>%
  group_by(drawn_category, class) %>%
  summarise(confusions = n(), count_category = count_category[1], animacy_size = animacy_size[1])  %>%
  mutate(prop_confusions = confusions / count_category)  %>%
  ungroup() %>%
  mutate(drawn_category = fct_reorder(drawn_category, animacy_size)) %>%
  mutate(class = factor(class, levels = levels(drawn_category)))
## Joining, by = "category"
## `summarise()` regrouping output by 'drawn_category' (override with `.groups` argument)

Plot 3: Plot confusions by class across all ages

ggplot(data = confusions_count_by_class, aes(x=class, y=drawn_category,fill=prop_confusions)) + 
  geom_tile() + 
  theme_few(base_size = 12) +
  theme(legend.position = 'left', axis.text.x = element_text(angle = 90,  
  size = 6, vjust=.5, hjust = 1), axis.text.y = element_text(angle = 0,
  size = 6, hjust = 1)) + 
  coord_fixed() + 
  scale_fill_viridis(option="A", name = 'Proportion classified')  +
  ylab('Drawn as') +
  xlab('Confused with') 

  # facet_wrap(~age_group, nrow=1)

ggsave('figures/classifier_confusions_count.pdf', width = 4, height = 5, units = 'in')
ggplot(data = confusions_by_class, aes(x=class, y=drawn_category,fill=mean_prob)) + 
  geom_tile() + 
  theme_few(base_size = 12) +
  theme(legend.position = 'left', axis.text.x = element_text(angle = 90,  
  size = 6, vjust=.5, hjust = 1), axis.text.y = element_text(angle = 0,
  size = 6, hjust = 1)) + 
  coord_fixed() + 
  scale_fill_viridis(option="A", limits=c(0,quantile(confusions_by_age$mean_prob,.998)), name = 'Classifier probability')  +
  ylab('Drawn as') +
  xlab('Confused with') 

  # facet_wrap(~age_group, nrow=1)

ggsave('figures/classifier_confusions.pdf', width = 4, height = 5, units = 'in')

Calculate whether animacy/size were correct

animacy_size_acc_by_age <- classification_data_long %>%
  mutate(drawn_category = category) %>%
  left_join(animacy_csv %>% select(animacy, size, category), by = c("drawn_category" = "category")) %>%
  ungroup() %>%
  rename(drawn_category_animacy = animacy, drawn_category_size = size) %>%
  left_join(animacy_csv %>% select(animacy, size, category),  by = c("class" = "category")) %>%
  rename(class_animacy = animacy, class_size = size) %>%
  group_by(image_name, drawn_category, age_numeric) %>%
  mutate(max_prob = max(prob), top_class = prob==max_prob)  %>%
  filter(top_class==TRUE) %>%
  mutate(animacy_correct = (drawn_category_animacy == class_animacy)) %>%
  mutate(size_correct = (drawn_category_size == class_size)) 
# anim_cor_by_category_by_age <- animacy_size_acc_by_age %>%
#   filter(correct_or_not==0) %>%
#   group_by(age_numeric,drawn_category, drawn_category_animacy) %>%
#   summarize(avg_animacy_correct = mean(animacy_correct), num_drawings = n())  %>%
#   ungroup() %>%
#   mutate(drawn_category = fct_reorder(drawn_category, drawn_category_animacy)) 
# 
# ggplot(anim_cor_by_category_by_age, aes(age_numeric,avg_animacy_correct, color=drawn_category_animacy, size=num_drawings)) +
#   geom_point(alpha=.5) + 
#   theme_few(base_size = base_size_chosen) + 
#   labs(x='Age', y='Proportion animacy correct') +
#   theme(legend.position = "none") +
#   geom_smooth(span=10, alpha=smooth_alpha) +
#   ylim(0,1) +
#   geom_hline(yintercept = .43, linetype="dashed", color="grey") +
#   facet_wrap(~drawn_category, nrow=6)

Calculate animacy correct relative to basline

baseline_animacy <- mean(animacy_csv$animacy)
baseline_objects <- 1 - mean(animacy_csv$animacy)

anim_cor_by_category_by_age <- animacy_size_acc_by_age %>%
  filter(correct_or_not==0) %>%
  mutate(drawn_category_animacy = as.factor(drawn_category_animacy))  %>%
  mutate(baseline_chance = case_when(drawn_category_animacy == 0 ~ baseline_objects, 
                                     drawn_category_animacy == 1 ~ baseline_animacy)) %>%
  group_by(age_numeric, drawn_category) %>%
  dplyr::summarize(avg_animacy_correct = mean(animacy_correct) - baseline_chance) %>%
  distinct(age_numeric, avg_animacy_correct, drawn_category)
## `summarise()` regrouping output by 'age_numeric', 'drawn_category' (override with `.groups` argument)
anim_cor_by_age <- anim_cor_by_category_by_age %>%
  group_by(age_numeric) %>%
  multi_boot_standard(col = 'avg_animacy_correct')
base_rate = .5

diffs=array()
for (c in 1:48){
  rands = sample(0:1,100, replace=TRUE)
  diff = mean(rands) - base_rate
  diffs[c]=diff
}

Plot 4: Plot animacy correct by age

ggplot(anim_cor_by_age, aes(x=age_numeric,y=mean, color=age_numeric)) +
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper), alpha=.8, size=.2) + 
  geom_point(data=anim_cor_by_category_by_age, aes(x=age_numeric, y=avg_animacy_correct), alpha=.1, size=.5) +
  theme_few(base_size = 8) + 
  labs(x='Age', y='Prop. animacy correct') +
  theme(legend.position = "none") +
  # geom_smooth(span=10, alpha=smooth_alpha, color='grey') +
  scale_color_viridis(option="D", discrete=FALSE) +
  geom_hline(yintercept =  0 , linetype="dashed", color="grey") 

ggsave('figures/animacy_classification.pdf', width = 1.5, height = 1.5, units = 'in')
objects_only <- animacy_csv %>%
  filter(animacy==0)

baseline_big <- mean(objects_only$size)
baseline_small <- 1 - mean(objects_only$size)


size_cor_by_category_by_age <- animacy_size_acc_by_age %>%
  filter(correct_or_not==0) %>%
  filter(drawn_category_animacy==0) %>% #only objects
  mutate(drawn_category_size = as.factor(drawn_category_size))  %>%
  mutate(baseline_chance = case_when(drawn_category_size == 0 ~ baseline_small, 
                                     drawn_category_size == 1 ~ baseline_big)) %>%
  group_by(age_numeric, drawn_category) %>%
  dplyr::summarize(avg_size_correct = mean(size_correct) - baseline_chance) %>%
  distinct(age_numeric, avg_size_correct, drawn_category)
## `summarise()` regrouping output by 'age_numeric', 'drawn_category' (override with `.groups` argument)
size_cor_by_age <- size_cor_by_category_by_age %>%
  group_by(age_numeric) %>%
  multi_boot_standard(col = 'avg_size_correct')
## Plot 4: Plot animacy correct by age

ggplot(size_cor_by_age, aes(x=age_numeric,y=mean, color=age_numeric)) +
  geom_pointrange(aes(ymin = ci_lower, ymax = ci_upper), alpha=.8, size=.2) + 
  geom_point(data=size_cor_by_category_by_age, aes(x=age_numeric, y=avg_size_correct), alpha=.1, size=.5) +
  theme_few(base_size = 8) + 
  labs(x='Age', y='Prop. object size correct') +
  theme(legend.position = "none") +
  # geom_smooth(span=10, alpha=smooth_alpha, color='grey') +
  scale_color_viridis(option="D", discrete=FALSE) +
  geom_hline(yintercept =  0 , linetype="dashed", color="grey") 

ggsave('figures/size_classification.pdf', width = 1.5, height = 1.5, units = 'in')

Run inferential stats

Just accuracy as a function of age

accuracy_age_only <- glmer(correct_or_not ~ age_numeric +
                        (1|session_id) +
                        (1|category),
      data = d)
## Warning in glmer(correct_or_not ~ age_numeric + (1 | session_id) + (1
## | : calling glmer() with family=gaussian (identity link) as a shortcut to
## lmer() is deprecated; please call lmer() directly
summary(accuracy_age_only)
## Linear mixed model fit by REML ['lmerMod']
## Formula: correct_or_not ~ age_numeric + (1 | session_id) + (1 | category)
##    Data: d
## 
## REML criterion at convergence: 26541
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.9131 -0.7692 -0.3712  1.0259  2.4535 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev.
##  session_id (Intercept) 0.01010  0.1005  
##  category   (Intercept) 0.02744  0.1657  
##  Residual               0.18161  0.4262  
## Number of obs: 22272, groups:  session_id, 7032; category, 48
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept) 0.175348   0.025241   6.947
## age_numeric 0.029284   0.001393  21.016
## 
## Correlation of Fixed Effects:
##             (Intr)
## age_numeric -0.295
accuracy_with_drawing_freq <- glmer(correct_or_not ~ scale(age_numeric) +
                          scale(drawing_frequency) + (1|session_id) + 
                        (age_numeric|category),
      data = d,  family="binomial")

accuracy_with_drawing_freq = summary(accuracy_with_drawing_freq)

xtable::xtable(summary(accuracy_with_drawing_freq)$coef, digits=3, caption = "Model coefficients of a GLMM predicting the recognziability of each drawing.")
## % latex table generated in R 3.6.1 by xtable 1.8-4 package
## % Tue Mar 30 11:02:08 2021
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrrr}
##   \hline
##  & Estimate & Std. Error & z value & Pr($>$$|$z$|$) \\ 
##   \hline
## (Intercept) & -1.015 & 0.197 & -5.153 & 0.000 \\ 
##   scale(age\_numeric) & 0.319 & 0.032 & 9.827 & 0.000 \\ 
##   scale(drawing\_frequency) & -0.078 & 0.150 & -0.520 & 0.603 \\ 
##    \hline
## \end{tabular}
## \caption{Model coefficients of a GLMM predicting the recognziability of each drawing.} 
## \end{table}

We first examined how classification accuracy varied according to the age of the child who produced each drawing as well as the category that was drawn. As expected, we found that classification accuracy based on these visual features increased steadily with the age of the child producing the drawing (STATS, see Figure XX), validating the expectation that older children’s drawings contain visual features that make them more recognizable. Importantly, we observed this developmental trend for many different categories that varied in the degree to which they are commonly drawn by children (see SI Appendix, Figure XX): for example, some object categories in our dataset are frequently drawn by children (e.g., car, tree, person) and others very infrequently drawn (e.g., cactus, whale, scissors). We formally evaluated this by asking parents of children aged 3-10 years to estimate the frequency with their child draws each category (N=50 parents, ) and directly examining how it affected classification performance in a second generalized linear mixed model (), adding this term as a covariate. We did not observe that classification performance was influenced by drawing frequency (STATS) in fact, many infrequently drawn categories (e.g. piano) had relatively high classification rates, and some frequently drawn categories (e.g. dog), had relatively low classification rates and were more likely to be confused with other similar categories (e.g., other animals). Thus, children increasingly include distinctive visual features of object categories in their drawings across childhood, regardless of whether these are objects that they have significant experience drawing or objects that they may have never drawn before.

correct_subset <- d %>% 
  filter(correct_or_not==1)

log_odds_corr_only <- lmer(log_odds ~ age_numeric +
                          scale(drawing_frequency) +
                        (1|session_id) +
                        (1|category) +  (1|run),
      data =correct_subset)
## boundary (singular) fit: see ?isSingular
xtable::xtable(summary(log_odds_corr_only)$coef, digits=3, caption = "Model coefficients of a GLMM predicting the 'distinctiveness' (i.e. log-odds probability of selecting the correct label) assigned to correctly classified drawings")
## % latex table generated in R 3.6.1 by xtable 1.8-4 package
## % Tue Mar 30 11:02:10 2021
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrrrr}
##   \hline
##  & Estimate & Std. Error & df & t value & Pr($>$$|$t$|$) \\ 
##   \hline
## (Intercept) & -1.342 & 0.110 & 52.144 & -12.240 & 0.000 \\ 
##   age\_numeric & 0.061 & 0.006 & 3445.384 & 10.468 & 0.000 \\ 
##   scale(drawing\_frequency) & 0.022 & 0.109 & 42.355 & 0.199 & 0.844 \\ 
##    \hline
## \end{tabular}
## \caption{Model coefficients of a GLMM predicting the 'distinctiveness' (i.e. log-odds probability of selecting the correct label) assigned to correctly classified drawings} 
## \end{table}

We thus restricted our analysis to drawings that the model was able to correclty classify (33% of the balanced subset of drawings, N=7468

accuracy_all_covariates <- glmer(correct_or_not ~ scale(avg_tracing_rating)*scale(age_numeric) +
                          scale(draw_duration) +
                          scale(mean_intensity) +
                          scale(num_strokes) +
                        (1|session_id) +
                        (1|category),
      data = d, family="binomial")

accuracy_all_covariates_no_int <- glmer(correct_or_not ~ scale(avg_tracing_rating)+scale(age_numeric) +
                          scale(draw_duration) +
                          scale(mean_intensity) +
                          scale(num_strokes) +
                        (1|session_id) +
                        (1|category),
      data = d, family="binomial")
accuracy_no_age <- glmer(correct_or_not ~ scale(avg_tracing_rating) + scale(draw_duration) +
                          scale(mean_intensity) +
                          scale(num_strokes) +
                        (1|session_id) +
                        (1|category),
      data = d, family="binomial")

accuracy_no_age_or_tracing <- glmer(correct_or_not ~ scale(draw_duration) +
                          scale(mean_intensity) +
                          scale(num_strokes) +
                        (1|session_id) +
                        (1|category),
      data = d, family="binomial")

accuracy_no_tracing <- glmer(correct_or_not ~ scale(age_numeric) +
                          scale(draw_duration) +
                          scale(mean_intensity) +
                          scale(num_strokes) +
                        (1|session_id) +
                        (1|category),
      data = d, family="binomial")
###
null = r.squaredGLMM(accuracy_no_age_or_tracing)
## Warning: 'r.squaredGLMM' now calculates a revised statistic. See the help
## page.
## Warning: The null model is correct only if all variables used by the
## original model remain unchanged.
no_age = r.squaredGLMM(accuracy_no_age)
## Warning: The null model is correct only if all variables used by the
## original model remain unchanged.
no_tracing = r.squaredGLMM(accuracy_no_tracing)
## Warning: The null model is correct only if all variables used by the
## original model remain unchanged.
all = r.squaredGLMM(accuracy_all_covariates)
## Warning: The null model is correct only if all variables used by the
## original model remain unchanged.
no_int = r.squaredGLMM(accuracy_all_covariates_no_int) # no_int = no interaction between tracing/age
## Warning: The null model is correct only if all variables used by the
## original model remain unchanged.
all_covariates_accuracy <- glmer(correct_or_not ~ scale(age_numeric)* scale(drawing_frequency)+ scale(avg_tracing_rating) + scale(draw_duration) +
                          scale(mean_intensity) +
                          scale(num_strokes) +
                        (1|session_id) +
                        (1|category),
      data = d, family="binomial")
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl =
## control$checkConv, : Model failed to converge with max|grad| = 0.00549854
## (tol = 0.001, component 1)
all_covariates_log_odds <- lmer(log_odds ~ scale(age_numeric)+ scale(avg_tracing_rating) + scale(draw_duration) +
                          scale(mean_intensity) +
                          scale(num_strokes) +
                          scale(drawing_frequency) + 
                        (1|session_id) +
                        (1|category),
      data = d)


xtable::xtable(summary(all_covariates_accuracy)$coef, digits=3, caption = "Model coefficients of a GLMM predicting whetjery of selecting the correct label) assigned to correctly classified drawings")
## % latex table generated in R 3.6.1 by xtable 1.8-4 package
## % Tue Mar 30 11:05:13 2021
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrrr}
##   \hline
##  & Estimate & Std. Error & z value & Pr($>$$|$z$|$) \\ 
##   \hline
## (Intercept) & -1.015 & 0.199 & -5.098 & 0.000 \\ 
##   scale(age\_numeric) & 0.238 & 0.020 & 11.904 & 0.000 \\ 
##   scale(drawing\_frequency) & -0.132 & 0.199 & -0.662 & 0.508 \\ 
##   scale(avg\_tracing\_rating) & 0.262 & 0.020 & 12.868 & 0.000 \\ 
##   scale(draw\_duration) & 0.068 & 0.021 & 3.219 & 0.001 \\ 
##   scale(mean\_intensity) & -0.071 & 0.021 & -3.412 & 0.001 \\ 
##   scale(num\_strokes) & 0.016 & 0.018 & 0.886 & 0.376 \\ 
##   scale(age\_numeric):scale(drawing\_frequency) & 0.002 & 0.017 & 0.119 & 0.905 \\ 
##    \hline
## \end{tabular}
## \caption{Model coefficients of a GLMM predicting whetjery of selecting the correct label) assigned to correctly classified drawings} 
## \end{table}
out_log_odds = round(summary(all_covariates_log_odds)$coef,3)